import re
import data_interest

# 正则，匹配兴趣词
pattern = re.compile(r'[\u4e00-\u9fa5\w\-/\(\)\%\\\+]+')

fp = open('interest-count.txt','w')

# 统计兴趣词出现的个数
for item in data_interest.data:
    interest_dict = {}
    interest_list = pattern.findall(str(item))
    for interest in interest_list:
        if interest in interest_dict:
            interest_dict[interest] = interest_dict[interest]+1
        else:
            interest_dict[interest] = 1
    print(interest_dict,file=fp)

fp.close()